df=read.csv("C:\\Users\\DELL\\Downloads\\accidents_18.csv",stringsAsFactors = TRUE,header = TRUE)
library(dplyr)
## Warning: package 'dplyr' was built under R version 4.0.5
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(ggplot2)
## Warning: package 'ggplot2' was built under R version 4.0.4
library(plotly)
## Warning: package 'plotly' was built under R version 4.0.5
##
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
Data is taken from
From this dataset we are interested in looking only the Accidents & Population(2018) for all STATES/UT and it is further factorized into REGIONS. ### data manipulation
accidents1=select(df,c('Region','States_UT','Population_2018','Accidents')) %>%
mutate(per_lakh=(Accidents/Population_2018)*100000)
acc_point=ggplot(accidents1,aes(x=Accidents,y=Population_2018)) +
geom_point(color='red',size=6) +
geom_smooth(method = lm) +
ggtitle("Accidents vs Population(2018)") +
xlab("ACCIDENTS") +
ylab("POPULATION") +
theme_classic()
plot(acc_point)
## `geom_smooth()` using formula 'y ~ x'
cor(accidents1$Population_2018,accidents1$Accidents,method = 'pearson')
## [1] 0.9136955
* the above scatter plot tells us that there is a linear relationship between Population and Accidents. * when the population increases the accidents to increases. * therefore we cannot come to a conclusion that STATE WITH HIGH ACCIDENTS IS CONSIDERED TO BE THE TOP STATE WITH HIGH ACCIDENT RATE. * therefore we use the per one lakh population to find the state with highest accidents rate.
acc_point_1=ggplot(accidents1,aes(x=Accidents,y=per_lakh)) +
geom_point(color="red",size=6) +
geom_smooth() +
ggtitle("ACCIDENTS per one lakh population") +
xlab("ACCIDENTS") +
ylab("PER LAKH")+
theme_light()
plot(acc_point_1)
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
cor(accidents1$Accidents,accidents1$per_lakh,method = 'pearson')
## [1] 0.4034025
acc_hist=ggplot(accidents1,aes(x=Accidents)) +
geom_histogram(bins=20,color="red",fill="orange",linetype="dashed") +
geom_vline(aes(xintercept = mean(Accidents),color='blue',lwd=1)) +
geom_vline(aes(xintercept =median(Accidents),color='green',lwd=1)) +
ggtitle("Histogram for Accidents") +
xlab("Accidents")+
ylab("Count")+
theme_dark()
plot(acc_hist)
summary(accidents1$Accidents)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.0 20.0 473.5 1127.2 1477.8 7179.0
acc_hist_pop=ggplot(accidents1,aes(x=per_lakh)) +
geom_histogram(bins=20,color='red')+
geom_vline(aes(xintercept=mean(per_lakh),color='green',lwd=2)) +
geom_vline(aes(xintercept=median(per_lakh),color='orange'))+
ggtitle("Histogram for accidents per lakh population")+
xlab("PER LAKH POPULATION") +
ylab("Count")+
theme_dark()
plot(acc_hist_pop)
summary(accidents1$per_lakh)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.000 1.220 2.723 2.820 4.184 7.370
reg_acc_box=ggplot(accidents1,aes(x=Region,y=Accidents,color=Region))+
geom_boxplot(outlier.color = 'red',outlier.shape = 2,outlier.size = 3) +
stat_summary(fun = mean,
geom = "point",
size = 3,
color = "steelblue") +
theme_classic()+
ggtitle("Boxplot for accidents in every Region") +
xlab("REGION") +
ylab("ACCIDENTS")
plot(reg_acc_box)
subset(accidents1,accidents1$Accidents %in% boxplot(accidents1$Accidents ~ accidents1$Region)$out)
## Region States_UT Population_2018 Accidents per_lakh
## 3 northeast Assam 31205576 681 2.182302
## 32 UT Dadra & Nagar Haveli 343709 22 6.400763
reg_perlakh_box=ggplot(accidents1,aes(x=Region,y=per_lakh,color=Region))+
geom_boxplot(outlier.color = 'red',outlier.shape = 2,outlier.size = 3) +
stat_summary(fun = mean,
geom = "point",
size = 3,
color = "steelblue") +
theme_classic()+
ggtitle("Boxplot for accidents per lakh population in every Region") +
xlab("REGION") +
ylab("per lakh pop.")
plot(reg_perlakh_box)
subset(accidents1,accidents1$per_lakh %in% boxplot(accidents1$per_lakh ~ accidents1$Region)$out)
## Region States_UT Population_2018 Accidents per_lakh
## 23 northeast Sikkim 610577 45 7.370078
## 32 UT Dadra & Nagar Haveli 343709 22 6.400763
top_10_states_acc=top_n(accidents1,10,per_lakh) %>%
arrange(desc(per_lakh))
top10_acc_plt=ggplot(top_10_states_acc,aes(x=States_UT,y=per_lakh)) +
geom_bar(stat="identity",color="red",fill="orange",linetype="dashed")+
ggtitle("TOP 10 STATES WITH HIGH ACCIDENTS RATE") +
xlab("STATES/UT") +
ylab("PER LAKH POPULATION") +
theme_dark()
plot(top10_acc_plt)
* From the above barchart we get to know that these are States/UT with high accident rate.
bottom_10_states_acc=filter(accidents1,per_lakh!=0) %>%
top_n(-10,per_lakh) %>%
arrange(per_lakh) %>%
ggplot(aes(x=States_UT,y=per_lakh)) +
geom_bar(stat="identity",color="red",fill="orange",linetype="dashed")+
ggtitle("TOP 10 STATES WITH LOW ACCIDENTS RATE") +
xlab("STATES/UT") +
ylab("PER LAKH POPULATION") +
theme_dark()
plot(bottom_10_states_acc)
* From the above barchart we conclude by saying that MEGHALAYA has the best accident rate among other states and SIKKIM has the worst rate of accidents in INDIA.
violating_rules=select(df,c(2:15))
violating_rules=select(violating_rules,c(-3:-4))
violating_rules=mutate(violating_rules,overspeed_kill_per10_accidents=(Overspeed_kill/Overspeed_accidents) * 10) %>%
mutate(drunken_drive_kill_per10_accidents=(Drunkendrive_kill/Drunkendrive_accidents) * 10) %>%
mutate(wrongside_kill_per_10_accidents=(Wrongside_kill/Wrongside_accidents) * 10) %>%
mutate(redsignal_kill_per10_accidents=(Redsignal_kill/Redsignal_accidents) * 10) %>%
mutate(mobile_kill_per10_accidents=(Mobile_kill/Mobile_accidents) * 10)
violations=mutate(violating_rules,total_accidents = Overspeed_accidents + Drunkendrive_accidents +
Wrongside_accidents + Redsignal_accidents + Mobile_accidents) %>%
mutate(total_kills = Overspeed_kill + Drunkendrive_kill +
Wrongside_kill + Redsignal_kill + Mobile_kill) %>%
mutate(per10_accidents = overspeed_kill_per10_accidents + drunken_drive_kill_per10_accidents +
wrongside_kill_per_10_accidents + redsignal_kill_per10_accidents + mobile_kill_per10_accidents)
violations = select(violations,c('Region','total_accidents','total_kills','per10_accidents')) %>%
group_by(Region)
violating_accidents= select(violations,c('Region','per10_accidents'))%>%
filter(per10_accidents > 0) %>%
group_by(Region) %>%
summarise(KILL_PER10_ACCIDENTS=(sum(per10_accidents)/n())/10)
data.frame(violating_accidents)
## Region KILL_PER10_ACCIDENTS
## 1 central 1.4862521
## 2 east 3.1587052
## 3 north 2.2632494
## 4 northeast 1.8069869
## 5 south 0.8661236
## 6 UT 1.0463237
## 7 west 1.5321885
violation_bar=plot_ly(violating_accidents,x= ~Region) %>%
add_trace(y= ~KILL_PER10_ACCIDENTS,name='ACCIDENTS (REGION_WISE)',type='bar')
violation_bar
violation_pie=plot_ly(violating_accidents,labels= ~Region,values= ~KILL_PER10_ACCIDENTS,type = 'pie',
textposition = 'inside',
textinfo = 'label+percent')
violation_pie
violation=read.csv("C:\\Users\\DELL\\Downloads\\violation.csv",header = TRUE)
violation=within(violation,{
Region=factor(Region)})
overspeed=select(violation,c(2:8)) %>%
mutate(kill_per_overspeed_acc=(over_kill/over_acc) * 10)
## histogram
overspeed_hist=ggplot(overspeed,aes(x=over_acc)) +
geom_histogram(bins=30,color="red",fill="orange",linetype="dashed") +
geom_vline(aes(xintercept = mean(over_acc),color='blue',lwd=1)) +
geom_vline(aes(xintercept =median(over_acc),color='green',lwd=1)) +
ggtitle("Histogram for overspeed accidents") +
xlab("Accidents")+
ylab("Count")+
theme_minimal()
plot(overspeed_hist)
summary(overspeed$over_acc)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 1.0 186.2 2965.0 8628.1 13070.5 46113.0
## outlier detection using boxplot
overspeed_boxplot=ggplot(overspeed,aes(x=Region,y=over_acc,color=Region)) +
geom_boxplot(outlier.color = 'red',outlier.shape = 2,outlier.size = 3) +
stat_summary(fun =mean,
geom = "point",
size = 3,
color = "steelblue") +
theme_classic()+
ggtitle("Boxplot for overspeed accidents in every Region") +
xlab("REGION") +
ylab("Count")
plot(overspeed_boxplot)
subset(overspeed,overspeed$over_acc %in% boxplot(overspeed$over_acc ~ overspeed$Region)$out)
## Region States_UTs over_acc over_kill over_gre_inj over_min_inj
## 3 northeast Assam 2532 995 1264 172
## 22 north Rajasthan 20132 9618 6160 13735
## 34 UT Delhi 2866 748 401 2217
## over_tot_inj kill_per_overspeed_acc
## 3 1436 3.929700
## 22 19895 4.777469
## 34 2618 2.609909
## plot
overspeed_point=ggplot(overspeed,aes(x=over_acc,y=over_kill)) +
geom_point(color='red',size=6) +
geom_smooth(method = lm) +
ggtitle("Scatter Plot for accidents") +
xlab("ACCIDENTS") +
ylab("KILL") +
theme_classic()
plot(overspeed_point)
## `geom_smooth()` using formula 'y ~ x'
cor(overspeed$over_acc,overspeed$over_kill,method = 'pearson')
## [1] 0.8647379
cor(overspeed$over_tot_inj,overspeed$over_gre_inj,method = 'pearson')
## [1] 0.6523911
cor(overspeed$over_tot_inj,overspeed$over_min_inj,method = 'pearson')
## [1] 0.9426355
## barplot
overspeed_injuries=plot_ly(overspeed,x= ~States_UTs) %>%
add_trace(y= ~over_gre_inj,name='Greviously',type='bar') %>%
add_trace(y =~over_min_inj,name='Minor',type='bar') %>%
add_trace(y =~over_tot_inj,name='Total Injury',type='bar')
overspeed_injuries
overspeed_injuries_region=plot_ly(overspeed,x= ~Region) %>%
add_trace(y= ~over_gre_inj,name='Greviously',type='bar') %>%
add_trace(y =~over_min_inj,name='Minor',type='bar') %>%
add_trace(y =~over_tot_inj,name='Total Injury',type='bar')
overspeed_injuries_region
top10_overspeed_kill=filter(overspeed,over_acc != over_kill ) %>%
top_n(10,kill_per_overspeed_acc) %>%
arrange(desc(kill_per_overspeed_acc)) %>%
ggplot(aes(x=States_UTs,y=kill_per_overspeed_acc)) +
geom_bar(stat="identity",color="red",fill="orange",linetype="dashed")+
ggtitle("TOP 10 STATES WITH HIGH KILLS DUE TO overspeed") +
xlab("STATES/UT") +
ylab("overspeed KILLS") +
theme_dark()
plot(top10_overspeed_kill)
bottom10_overspeed_kill=filter(overspeed,over_acc != over_kill ) %>%
top_n(-10,kill_per_overspeed_acc) %>%
arrange(desc(kill_per_overspeed_acc)) %>%
ggplot(aes(x=States_UTs,y=kill_per_overspeed_acc)) +
geom_bar(stat="identity",color="red",fill="orange",linetype="dashed")+
ggtitle("TOP 10 STATES WITH LOW KILLS DUE TO overspeed") +
xlab("STATES/UT") +
ylab("overspeed KILLS") +
theme_dark()
plot(bottom10_overspeed_kill)
* Most of the kills happened due to overspeed is found in MIZORAM and least is in ANDAMAN & NICOBAR ISLANDS * States like PUNJAB & BIHAR is suggested to look into this problem with high attention to reduce the count of kills.
overspeed_result = select(overspeed,c('Region','kill_per_overspeed_acc')) %>%
group_by(Region) %>%
summarise(kill_per10_overspeed_accidents=(sum(kill_per_overspeed_acc)/n())/10)
data.frame(overspeed_result)
## Region kill_per10_overspeed_accidents
## 1 central 0.4237846
## 2 east 0.6071363
## 3 north 0.4446250
## 4 northeast 0.4479920
## 5 south 0.2433911
## 6 UT 0.4218447
## 7 west 0.2990527
overspeed_bar=plot_ly(overspeed_result,x= ~Region) %>%
add_trace(y= ~kill_per10_overspeed_accidents,name=' OVERSPEED ACCIDENTS (REGION_WISE)',type='bar')
overspeed_bar
overspeed_pie=plot_ly(overspeed_result,labels= ~Region,values= ~kill_per10_overspeed_accidents,type = 'pie',
textposition = 'inside',
textinfo = 'label+percent')
overspeed_pie
drunkendrive=select(violation,c(2:13))
drunkendrive=select(drunkendrive,c(-3:-7)) %>%
mutate(kill_per_drunkendrive_acc=(drunk_kill/drunk_acc) * 10)
## histogram
drunkendrive_hist=ggplot(drunkendrive,aes(x=drunk_acc)) +
geom_histogram(bins=30,color="red",fill="orange",linetype="dashed") +
geom_vline(aes(xintercept = mean(drunk_acc),color='blue',lwd=1)) +
geom_vline(aes(xintercept =median(drunk_acc),color='green',lwd=1)) +
ggtitle("Histogram for drunkendrive accidents") +
xlab("Accidents")+
ylab("Count")+
theme_dark()
plot(drunkendrive_hist)
summary(drunkendrive$drunk_acc)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.0 19.0 109.0 333.8 324.8 3595.0
## outlier detection using boxplot
drunkendrive_boxplot=ggplot(drunkendrive,aes(x=Region,y=drunk_acc,color=Region)) +
geom_boxplot(outlier.color = 'red',outlier.shape = 2,outlier.size = 3) +
stat_summary(fun =mean,
geom = "point",
size = 3,
color = "steelblue") +
theme_classic()+
ggtitle("Boxplot for drunkendrive accidents in every Region") +
xlab("REGION") +
ylab("Count")
plot(drunkendrive_boxplot)
subset(drunkendrive,drunkendrive$drunk_acc %in% boxplot(drunkendrive$drunk_acc ~ drunkendrive$Region)$out)
## Region States_UTs drunk_acc drunk_kill drunk_gre_inj drunk_min_inj
## 3 northeast Assam 377 130 270 85
## 34 UT Delhi 333 72 30 262
## drunk_tot_inj kill_per_drunkendrive_acc
## 3 355 3.448276
## 34 292 2.162162
## plot
drunkendrive_point=ggplot(drunkendrive,aes(x=drunk_acc,y=drunk_kill)) +
geom_point(color='red',size=6) +
geom_smooth(method = lm) +
ggtitle("Scatter Plot for accidents") +
xlab("ACCIDENTS") +
ylab("KILL") +
theme_classic()
plot(drunkendrive_point)
## `geom_smooth()` using formula 'y ~ x'
cor(drunkendrive$drunk_acc,drunkendrive$drunk_kill,method = 'pearson')
## [1] 0.9314265
## barplot
drunkendrive_injuries=plot_ly(drunkendrive,x= ~States_UTs) %>%
add_trace(y= ~drunk_gre_inj,name='Greviously',type='bar') %>%
add_trace(y =~drunk_min_inj,name='Minor',type='bar') %>%
add_trace(y =~drunk_tot_inj,name='Total Injury',type='bar')
drunkendrive_injuries
drunkendrive_injuries_region=plot_ly(drunkendrive,x= ~Region) %>%
add_trace(y= ~drunk_gre_inj,name='Greviously',type='bar') %>%
add_trace(y =~drunk_min_inj,name='Minor',type='bar') %>%
add_trace(y =~drunk_tot_inj,name='Total Injury',type='bar')
drunkendrive_injuries_region
top10_drunkendrive_kill=filter(drunkendrive,drunk_acc != drunk_kill ) %>%
top_n(10,kill_per_drunkendrive_acc) %>%
arrange(desc(kill_per_drunkendrive_acc)) %>%
ggplot(aes(x=States_UTs,y=kill_per_drunkendrive_acc)) +
geom_bar(stat="identity",color="red",fill="orange",linetype="dashed")+
ggtitle("TOP 10 STATES WITH HIGH KILLS DUE TO drunkendrive") +
xlab("STATES/UT") +
ylab("Drunkendrive KILLS") +
theme_dark()
plot(top10_drunkendrive_kill)
bottom10_drunkendrive_kill=filter(drunkendrive,drunk_acc != drunk_kill & drunk_acc > 0 & drunk_kill > 0 &
drunk_acc > drunk_kill) %>%
top_n(-10,kill_per_drunkendrive_acc) %>%
arrange(desc(kill_per_drunkendrive_acc)) %>%
ggplot(aes(x=States_UTs,y=kill_per_drunkendrive_acc)) +
geom_bar(stat="identity",color="red",fill="orange",linetype="dashed")+
ggtitle("TOP 10 STATES WITH LOW KILLS DUE TO drunkendrive") +
xlab("STATES/UT") +
ylab("Drunkendrive KILLS") +
theme_dark()
plot(bottom10_drunkendrive_kill)
* We conclude by saying that the states UTTARAKHAND & MIZORAM faces more kills due to drunknen drive while ANDHRA PRADESH & WEST BENGAL is good in controlling drunken drive accidents. * The central region faces more accidents and as we know that there is a linear relationship btw accidents & kills , the same region is facing more INJURIES too.
drunkendrive_result = select(drunkendrive,c('Region','kill_per_drunkendrive_acc')) %>%
filter(kill_per_drunkendrive_acc >0) %>%
group_by(Region) %>%
summarise(kill_per10_drunkendrive_accidents=(sum(kill_per_drunkendrive_acc)/n())/10)
data.frame(drunkendrive_result)
## Region kill_per10_drunkendrive_accidents
## 1 central 0.4310937
## 2 east 0.3663242
## 3 north 0.4224040
## 4 northeast 0.5689041
## 5 south 0.1424085
## 6 UT 0.1795367
## 7 west 0.2697860
drunkendrive_bar=plot_ly(drunkendrive_result,x= ~Region) %>%
add_trace(y= ~kill_per10_drunkendrive_accidents,name=' DRUNKENDRIVE ACCIDENTS (REGION_WISE)',type='bar')
drunkendrive_bar
drunkendrive_pie=plot_ly(drunkendrive_result,labels= ~Region,values= ~kill_per10_drunkendrive_accidents,type = 'pie',
textposition = 'inside',
textinfo = 'label+percent')
drunkendrive_pie
wrongside=select(violation,c(2:18))
wrongside=select(wrongside,c(-3:-12)) %>%
mutate(kill_per_wrongside_acc=(wrongside_kill/wrongside_acc) * 10)
## histogram
wrongside_hist=ggplot(wrongside,aes(x=wrongside_acc)) +
geom_histogram(bins=30,color="red",fill="orange",linetype="dashed") +
geom_vline(aes(xintercept = mean(wrongside_acc),color='blue',lwd=1)) +
geom_vline(aes(xintercept = median(wrongside_acc),color='green',lwd=1)) +
ggtitle("Histogram for Wrongside accidents") +
xlab("Accidents")+
ylab("Count")+
theme_dark()
plot(wrongside_hist)
summary(wrongside$wrongside_acc)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 1.0 22.0 235.0 688.4 1055.0 4572.0
## outlier detection using boxplot
wrongside_boxplot=ggplot(wrongside,aes(x=Region,y=wrongside_acc,color=Region)) +
geom_boxplot(outlier.color = 'red',outlier.shape = 2,outlier.size = 3) +
stat_summary(fun =mean,
geom = "point",
size = 3,
color = "steelblue") +
theme_classic()+
ggtitle("Boxplot for wrongside accidents in every Region") +
xlab("REGION") +
ylab("Count")
plot(wrongside_boxplot)
subset(wrongside,wrongside$wrongside_acc %in% boxplot(wrongside$wrongside_acc ~ wrongside$Region)$out)
## Region States_UTs wrongside_acc wrongside_kill wrongside_gre_inj
## 3 northeast Assam 1448 482 858
## wrongside_min_inj wrongside_tot_inj kill_per_wrongside_acc
## 3 330 1188 3.328729
## plot
wrongside_point=ggplot(wrongside,aes(x=wrongside_acc,y=wrongside_kill)) +
geom_point(color='red',size=6) +
geom_smooth(method = lm) +
ggtitle("Scatter Plot for accidents") +
xlab("ACCIDENTS") +
ylab("KILL") +
theme_classic()
plot(wrongside_point)
## `geom_smooth()` using formula 'y ~ x'
cor(wrongside$wrongside_acc,wrongside$wrongside_kill,method = 'pearson')
## [1] 0.8914849
## barplot
wrongside_injuries=plot_ly(wrongside,x= ~States_UTs) %>%
add_trace(y= ~wrongside_gre_inj,name='Greviously',type='bar') %>%
add_trace(y =~wrongside_min_inj,name='Minor',type='bar') %>%
add_trace(y =~wrongside_tot_inj,name='Total Injury',type='bar')
wrongside_injuries
wrongside_injuries_region=plot_ly(wrongside,x= ~Region) %>%
add_trace(y= ~wrongside_gre_inj,name='Greviously',type='bar') %>%
add_trace(y =~wrongside_min_inj,name='Minor',type='bar') %>%
add_trace(y =~wrongside_tot_inj,name='Total Injury',type='bar')
wrongside_injuries_region
top10_wrongside_kill=filter(wrongside,wrongside_acc != wrongside_kill ) %>%
top_n(10,kill_per_wrongside_acc) %>%
arrange(desc(kill_per_wrongside_acc)) %>%
ggplot(aes(x=States_UTs,y=kill_per_wrongside_acc)) +
geom_bar(stat="identity",color="red",fill="orange",linetype="dashed")+
ggtitle("TOP 10 STATES WITH HIGH KILLS DUE TO Wrongside") +
xlab("STATES/UT") +
ylab("Wrongside KILLS") +
theme_dark()
plot(top10_wrongside_kill)
bottom10_wrongside_kill=filter(wrongside,wrongside_acc != wrongside_kill & wrongside_acc > 0 & wrongside_kill > 0 & wrongside_acc > wrongside_kill) %>%
top_n(-10,kill_per_wrongside_acc) %>%
arrange(desc(kill_per_wrongside_acc)) %>%
ggplot(aes(x=States_UTs,y=kill_per_wrongside_acc)) +
geom_bar(stat="identity",color="red",fill="orange",linetype="dashed")+
ggtitle("TOP 10 STATES WITH LOW KILLS DUE TO wrongside") +
xlab("STATES/UT") +
ylab("wrongside KILLS") +
theme_dark()
plot(bottom10_wrongside_kill)
* States like TAMIL NADU, UTTAR PRADESH & MADHYA PRADESH faces more number of accidents due to wrongside accidents. But as we saw in the scatter plot the accidents though are in large in number the kills are slightly increasing and as a result we see that it reflects in the injuries. * Accidents faced by the states TAMIL NADU & MADHYA PRADESH is high but the convincing part is most of them are found out to be the MINOR ACCIDENTS. * Which is not in the case of UTTAR PRADESH * We also found that the most of the state have less number of kills due to WRONGSIDE DRIVING. In this the highest kills is faced by DADRA & NAGAR HAVELI union territory and the lowest is in the state GOA.
wrongside_result = select(wrongside,c('Region','kill_per_wrongside_acc')) %>%
filter(kill_per_wrongside_acc >0) %>%
group_by(Region) %>%
summarise(kill_per10_wrongside_accidents=(sum(kill_per_wrongside_acc)/n())/10)
data.frame(wrongside_result)
## Region kill_per10_wrongside_accidents
## 1 central 0.4122998
## 2 east 0.5664040
## 3 north 0.4243812
## 4 northeast 0.3721009
## 5 south 0.1823934
## 6 UT 0.7851541
## 7 west 0.2552457
wrongside_bar=plot_ly(wrongside_result,x= ~Region) %>%
add_trace(y= ~kill_per10_wrongside_accidents,name=' WRONGSIDE DRIVING ACCIDENTS (REGION_WISE)',type='bar')
wrongside_bar
wrongside_pie=plot_ly(wrongside_result,labels= ~Region,values= ~kill_per10_wrongside_accidents,type = 'pie',
textposition = 'inside',
textinfo = 'label+percent')
wrongside_pie
redsignal=select(violation,c(2:23))
redsignal=select(redsignal,c(-3:-17)) %>%
mutate(kill_per_redsignal_acc=(redsig_kill/redsig_acc) * 10)
## histogram
redsignal_hist=ggplot(redsignal,aes(x=redsig_acc)) +
geom_histogram(bins=30,color="red",fill="orange",linetype="dashed") +
geom_vline(aes(xintercept = mean(redsig_acc),color='blue',lwd=1)) +
geom_vline(aes(xintercept = median(redsig_acc),color='green',lwd=1)) +
ggtitle("Histogram for jumping redsignal accidents") +
xlab("Accidents")+
ylab("Count")+
theme_dark()
plot(redsignal_hist)
summary(redsignal$redsig_acc)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.0 0.0 41.5 123.4 111.2 939.0
## outlier detection using boxplot
redsignal_boxplot=ggplot(redsignal,aes(x=Region,y=redsig_acc,color=Region)) +
geom_boxplot(outlier.color = 'red',outlier.shape = 2,outlier.size = 3) +
stat_summary(fun =mean,
geom = "point",
size = 3,
color = "steelblue") +
theme_classic()+
ggtitle("Boxplot for jumping redsignal accidents in every Region") +
xlab("REGION") +
ylab("Count")
plot(redsignal_boxplot)
subset(redsignal,redsignal$redsig_acc %in% boxplot(redsignal$redsig_acc ~ redsignal$Region)$out)
## Region States_UTs redsig_acc redsig_kill redsig_gre_inj redsig_min_inj
## 3 northeast Assam 159 62 120 29
## 24 south Tamil Nadu 939 290 117 882
## 34 UT Delhi 658 176 76 507
## redsig_tot_inj kill_per_redsignal_acc
## 3 149 3.899371
## 24 999 3.088392
## 34 583 2.674772
## plot
redsignal_point=ggplot(redsignal,aes(x=redsig_acc,y=redsig_kill)) +
geom_point(color='red',size=6) +
geom_smooth(method = lm) +
ggtitle("Scatter Plot for accidents") +
xlab("ACCIDENTS") +
ylab("KILL") +
theme_classic()
plot(redsignal_point)
## `geom_smooth()` using formula 'y ~ x'
cor(redsignal$redsig_acc,redsignal$redsig_kill,method = 'pearson')
## [1] 0.9581153
## barplot
redsignal_injuries=plot_ly(redsignal,x= ~States_UTs) %>%
add_trace(y= ~redsig_gre_inj,name='Greviously',type='bar') %>%
add_trace(y =~redsig_min_inj,name='Minor',type='bar') %>%
add_trace(y =~redsig_tot_inj,name='Total Injury',type='bar')
redsignal_injuries
redsignal_injuries_region=plot_ly(redsignal,x= ~Region) %>%
add_trace(y= ~redsig_gre_inj,name='Greviously',type='bar') %>%
add_trace(y =~redsig_min_inj,name='Minor',type='bar') %>%
add_trace(y =~redsig_tot_inj,name='Total Injury',type='bar')
redsignal_injuries_region
top10_redsignal_kill=filter(redsignal,redsig_acc != redsig_kill ) %>%
top_n(10,kill_per_redsignal_acc) %>%
arrange(desc(kill_per_redsignal_acc)) %>%
ggplot(aes(x=States_UTs,y=kill_per_redsignal_acc)) +
geom_bar(stat="identity",color="red",fill="orange",linetype="dashed")+
ggtitle("TOP 10 STATES WITH HIGH KILLS DUE TO Jumping Redsignal") +
xlab("STATES/UT") +
ylab("Redsignal KILLS") +
theme_dark()
plot(top10_redsignal_kill)
bottom10_redsignal_kill=filter(redsignal,redsig_acc != redsig_kill & redsig_acc > 0 & redsig_kill > 0 &
redsig_acc > redsig_kill) %>%
top_n(-10,kill_per_redsignal_acc) %>%
arrange(desc(kill_per_redsignal_acc)) %>%
ggplot(aes(x=States_UTs,y=kill_per_redsignal_acc)) +
geom_bar(stat="identity",color="red",fill="orange",linetype="dashed")+
ggtitle("TOP 10 STATES WITH LOW KILLS DUE TO Redsignal") +
xlab("STATES/UT") +
ylab("Redsignal KILLS") +
theme_dark()
plot(bottom10_redsignal_kill)
* In TAMIL NADU there as been more number of accidents due to jumping redsignal but most of them were to be minor * Due to the high number of accidents in TAMIL NADU , south india seems to be the region with high percent of cases. * But it is the CENTRAL INDIA which causes a lot of accidents by jumping redsignals.
redsignal_result = select(redsignal,c('Region','kill_per_redsignal_acc')) %>%
filter(kill_per_redsignal_acc >0) %>%
group_by(Region) %>%
summarise(kill_per10_redsignal_accidents=(sum(kill_per_redsignal_acc)/n())/10)
data.frame(redsignal_result)
## Region kill_per10_redsignal_accidents
## 1 central 0.2700378
## 2 east 0.5403756
## 3 north 0.4376152
## 4 northeast 0.4466457
## 5 south 0.1298038
## 6 UT 0.1732170
## 7 west 0.2280415
redsignal_bar=plot_ly(redsignal_result,x= ~Region) %>%
add_trace(y= ~kill_per10_redsignal_accidents,name=' JUMPING REDSIGNAL ACCIDENTS (REGION_WISE)',type='bar')
redsignal_bar
redsignal_pie=plot_ly(redsignal_result,labels= ~Region,values= ~kill_per10_redsignal_accidents,type = 'pie',
textposition = 'inside',
textinfo = 'label+percent')
redsignal_pie
mobile=select(violation,c(2:28))
mobile=select(mobile,c(-3:-22)) %>%
mutate(kill_per_mobile_acc=(mobile_kill/mobile_acc) * 10)
## histogram
mobile_hist=ggplot(mobile,aes(x=mobile_acc)) +
geom_histogram(bins=30,color="red",fill="orange",linetype="dashed") +
geom_vline(aes(xintercept = mean(mobile_acc),color='blue',lwd=1)) +
geom_vline(aes(xintercept = median(mobile_acc),color='green',lwd=1)) +
ggtitle("Histogram for jumping mobile accidents") +
xlab("Accidents")+
ylab("Count")+
theme_dark()
plot(mobile_hist)
summary(mobile$mobile_acc)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.0 0.0 27.5 251.1 201.2 3828.0
## outlier detection using boxplot
mobile_boxplot=ggplot(mobile,aes(x=Region,y=mobile_acc,color=Region)) +
geom_boxplot(outlier.color = 'red',outlier.shape = 2,outlier.size = 3) +
stat_summary(fun =mean,
geom = "point",
size = 3,
color = "steelblue") +
theme_classic()+
ggtitle("Boxplot for jumping mobile accidents in every Region") +
xlab("REGION") +
ylab("Count")
plot(mobile_boxplot)
subset(mobile,mobile$mobile_acc %in% boxplot(mobile$mobile_acc ~ mobile$Region)$out)
## Region States_UTs mobile_acc mobile_kill mobile_gre_inj mobile_min_inj
## 3 northeast Assam 112 22 77 31
## 24 south Tamil Nadu 1477 252 631 930
## 34 UT Delhi 244 37 74 151
## mobile_tot_inj kill_per_mobile_acc
## 3 108 1.964286
## 24 1561 1.706161
## 34 225 1.516393
## plot
mobile_point=ggplot(mobile,aes(x=mobile_acc,y=mobile_kill)) +
geom_point(color='red',size=6) +
geom_smooth(method = lm) +
ggtitle("Scatter Plot for accidents") +
xlab("ACCIDENTS") +
ylab("KILL") +
theme_classic()
plot(mobile_point)
## `geom_smooth()` using formula 'y ~ x'
cor(mobile$mobile_acc,mobile$mobile_kill,method='pearson')
## [1] 0.9650849
## barplot
mobile_injuries=plot_ly(mobile,x= ~States_UTs) %>%
add_trace(y= ~mobile_gre_inj,name='Greviously',type='bar') %>%
add_trace(y =~mobile_min_inj,name='Minor',type='bar') %>%
add_trace(y =~mobile_tot_inj,name='Total Injury',type='bar')
mobile_injuries
mobile_injuries_region=plot_ly(mobile,x= ~Region) %>%
add_trace(y= ~mobile_gre_inj,name='Greviously',type='bar') %>%
add_trace(y =~mobile_min_inj,name='Minor',type='bar') %>%
add_trace(y =~mobile_tot_inj,name='Total Injury',type='bar')
mobile_injuries_region
top10_mobile_kill=filter(mobile,mobile_acc != mobile_kill ) %>%
top_n(10,kill_per_mobile_acc) %>%
arrange(desc(kill_per_mobile_acc)) %>%
ggplot(aes(x=States_UTs,y=kill_per_mobile_acc)) +
geom_bar(stat="identity",color="red",fill="orange",linetype="dashed")+
ggtitle("TOP 10 STATES WITH HIGH KILLS DUE TO Jumping mobile") +
xlab("STATES/UT") +
ylab("Redsignal KILLS") +
theme_dark()
plot(top10_mobile_kill)
bottom10_mobile_kill=filter(mobile,mobile_acc != mobile_kill & mobile_acc > 0 & mobile_kill > 0 &
mobile_acc > mobile_kill) %>%
top_n(-10,kill_per_mobile_acc) %>%
arrange(desc(kill_per_mobile_acc)) %>%
ggplot(aes(x=States_UTs,y=kill_per_mobile_acc)) +
geom_bar(stat="identity",color="red",fill="orange",linetype="dashed")+
ggtitle("TOP 10 STATES WITH LOW KILLS DUE TO mobile") +
xlab("STATES/UT") +
ylab("Redsignal KILLS") +
theme_dark()
plot(bottom10_mobile_kill)
* Most of the injuries has been occured in UTTAR PRADESH due to using mobiles while driving. And most of them are grevious injuries. * But the state of WEST BENGAL tops per highest kill rate per 10 accidents that occur due to using mobile while driving. * While MANIPUR & ANDHRA PRADESH has been good with low kill rate.
mobile_result = select(mobile,c('Region','kill_per_mobile_acc')) %>%
filter(kill_per_mobile_acc >0) %>%
group_by(Region) %>%
summarise(kill_per10_mobile_accidents=(sum(kill_per_mobile_acc)/n())/10)
data.frame(mobile_result)
## Region kill_per10_mobile_accidents
## 1 central 0.2830170
## 2 east 1.0784650
## 3 north 0.5520110
## 4 northeast 0.4428547
## 5 south 0.1681268
## 6 UT 0.1516393
## 7 west 0.3503197
mobile_bar=plot_ly(mobile_result,x= ~Region) %>%
add_trace(y= ~kill_per10_mobile_accidents,name=' USING MOBILE ACCIDENTS (REGION_WISE)',type='bar')
mobile_bar
mobile_pie=plot_ly(mobile_result,labels= ~Region,values= ~kill_per10_mobile_accidents,type = 'pie',
textposition = 'inside',
textinfo = 'label+percent')
mobile_pie
### descriptive statistics
ACCIDENTS= data.frame(
traffic_violations=c('Overspeeding','Drunken Drive','Wrongside Driving','Jumping Redsignal','Using Mobile'),
total_accidents=c(sum(violation$over_acc),sum(violation$drunk_acc),sum(violation$wrongside_acc),sum(violation$redsig_acc),
sum(violation$mobile_acc)),
total_kills=c(sum(violation$over_kill),sum(violation$drunk_kill),sum(violation$wrongside_kill),
sum(violation$redsig_kill),sum(violation$mobile_kill)),
total_injuries=c(sum(violation$over_tot_inj),sum(violation$drunk_tot_inj),sum(violation$wrongside_tot_inj),
sum(violation$redsig_tot_inj),sum(violation$mobile_tot_inj))
)
ACCIDENTS
## traffic_violations total_accidents total_kills total_injuries
## 1 Overspeeding 310612 97588 316421
## 2 Drunken Drive 12018 4188 9944
## 3 Wrongside Driving 24781 8764 24100
## 4 Jumping Redsignal 4441 1545 4126
## 5 Using Mobile 9039 3707 7878
ACCIDENTS=within(ACCIDENTS,{
traffic_violations=factor(traffic_violations)
})
str(ACCIDENTS)
## 'data.frame': 5 obs. of 4 variables:
## $ traffic_violations: Factor w/ 5 levels "Drunken Drive",..: 3 1 5 2 4
## $ total_accidents : int 310612 12018 24781 4441 9039
## $ total_kills : int 97588 4188 8764 1545 3707
## $ total_injuries : int 316421 9944 24100 4126 7878
summary(ACCIDENTS)
## traffic_violations total_accidents total_kills total_injuries
## Drunken Drive :1 Min. : 4441 Min. : 1545 Min. : 4126
## Jumping Redsignal:1 1st Qu.: 9039 1st Qu.: 3707 1st Qu.: 7878
## Overspeeding :1 Median : 12018 Median : 4188 Median : 9944
## Using Mobile :1 Mean : 72178 Mean :23158 Mean : 72494
## Wrongside Driving:1 3rd Qu.: 24781 3rd Qu.: 8764 3rd Qu.: 24100
## Max. :310612 Max. :97588 Max. :316421
cor(ACCIDENTS$total_accidents,ACCIDENTS$total_kills,method= 'pearson')
## [1] 0.9999653
acc_bar=plot_ly(ACCIDENTS,x= ~traffic_violations) %>%
add_trace(y= ~total_accidents,name='ACCIDENTS',type='bar') %>%
add_trace(y =~total_kills,name='DEATHS',type='bar') %>%
add_trace(y =~total_injuries,name='INJURY',type='bar')
acc_bar
acc_pie=plot_ly(ACCIDENTS,labels= ~traffic_violations,values= ~total_accidents,type = 'pie',
textposition = 'inside',
textinfo = 'label+percent')
acc_pie
acc_kill_pie=plot_ly(ACCIDENTS,labels= ~traffic_violations,values= ~total_kills,type='pie',
textposition = 'inside',
textinfo = 'label+percent')
acc_kill_pie
acc_inj_pie=plot_ly(ACCIDENTS,labels= ~traffic_violations,values= ~total_injuries,type='pie',
textposition ='inside',
textinfo = 'label+percent')
acc_inj_pie
traffic=read.csv("C:\\Users\\DELL\\Downloads\\traffic.csv",header = TRUE)
traffic=within(traffic,{
Region=factor(Region)})
#### Traffic signal controlled
trafficSignal=select(traffic,c(2:8)) %>%
mutate(kill_per_trafficsignal_acc=(tra_sig_kill/tra_sig_acc) * 10)
## histogram
trafficSignal_hist=ggplot(trafficSignal,aes(x=tra_sig_acc)) +
geom_histogram(bins=30,color="red",fill="orange",linetype="dashed") +
geom_vline(aes(xintercept = mean(tra_sig_acc),color='blue',lwd=1)) +
geom_vline(aes(xintercept =median(tra_sig_acc),color='green',lwd=1)) +
ggtitle("Histogram for traffic signal controlled accidents") +
xlab("Accidents")+
ylab("Count")+
theme_dark()
plot(trafficSignal_hist)
## outlier detection using boxplot
trafficSignal_boxplot=ggplot(trafficSignal,aes(x=Region,y=tra_sig_acc,color=Region)) +
geom_boxplot(outlier.color = 'red',outlier.shape = 2,outlier.size = 3) +
stat_summary(fun =mean,
geom = "point",
size = 3,
color = "steelblue") +
theme_classic()+
ggtitle("Boxplot for traffic signal controlled accidents in every Region") +
xlab("REGION") +
ylab("Count")
plot(trafficSignal_boxplot)
subset(trafficSignal,trafficSignal$tra_sig_acc %in% boxplot(trafficSignal$tra_sig_acc ~ trafficSignal$Region)$out)
## Region States_Uts tra_sig_acc tra_sig_kill tra_sig_gre_inj
## 3 northeast Assam 173 34 106
## 24 south Tamil Nadu 2965 561 462
## 34 UT Delhi 699 143 129
## tra_sig_min_inj tra_sig_tot_inj kill_per_trafficsignal_acc
## 3 35 141 1.965318
## 24 2853 3315 1.892074
## 34 458 587 2.045780
## plot
trafficSignal_point=ggplot(trafficSignal,aes(x=tra_sig_acc,y=tra_sig_kill)) +
geom_point(color='red',size=6) +
geom_smooth(method = lm) +
ggtitle("Scatter Plot for traffic controlled accidents") +
xlab("ACCIDENTS") +
ylab("KILL") +
theme_classic()
plot(trafficSignal_point)
## `geom_smooth()` using formula 'y ~ x'
## barplot
trafficSignal_injuries=plot_ly(trafficSignal,x= ~States_Uts) %>%
add_trace(y= ~tra_sig_gre_inj,name='Greviously',type='bar') %>%
add_trace(y =~tra_sig_min_inj,name='Minor',type='bar') %>%
add_trace(y =~tra_sig_tot_inj,name='Total Injury',type='bar')
trafficSignal_injuries
trafficSignal_injuries_region=plot_ly(trafficSignal,x= ~Region) %>%
add_trace(y= ~tra_sig_gre_inj,name='Greviously',type='bar') %>%
add_trace(y =~tra_sig_min_inj,name='Minor',type='bar') %>%
add_trace(y =~tra_sig_tot_inj,name='Total Injury',type='bar')
trafficSignal_injuries_region
top10_trafficSignal_kill=filter(trafficSignal,tra_sig_acc != tra_sig_kill ) %>%
top_n(10,kill_per_trafficsignal_acc) %>%
arrange(desc(kill_per_trafficsignal_acc)) %>%
ggplot(aes(x=States_Uts,y=kill_per_trafficsignal_acc)) +
geom_bar(stat="identity",color="red",fill="orange",linetype="dashed")+
ggtitle("TOP 10 STATES WITH HIGH KILLS IN Traffic signal controlled places") +
xlab("STATES/UT") +
ylab("traffic signal control KILLS") +
theme_dark()
plot(top10_trafficSignal_kill)
bottom10_trafficSignal_kill=filter(trafficSignal,tra_sig_acc != tra_sig_kill &
tra_sig_acc > 0 & tra_sig_kill > 0 &
tra_sig_acc > tra_sig_kill ) %>%
top_n(-10,kill_per_trafficsignal_acc) %>%
arrange(desc(kill_per_trafficsignal_acc)) %>%
ggplot(aes(x=States_Uts,y=kill_per_trafficsignal_acc)) +
geom_bar(stat="identity",color="red",fill="orange",linetype="dashed")+
ggtitle("TOP 10 STATES WITH LOW KILLS in traffic signal controlled places") +
xlab("STATES/UT") +
ylab("traffic signal control KILLS") +
theme_dark()
plot(bottom10_trafficSignal_kill)
###########################################################################
policeControlled=select(traffic,c(2:13))
policeControlled=select(policeControlled,c(-3:-7)) %>%
mutate(kill_per_policeControlled_acc=(pol_kill/pol_acc) * 10)
## histogram
policeControlled_hist=ggplot(policeControlled,aes(x=pol_acc)) +
geom_histogram(bins=30,color="red",fill="orange",linetype="dashed") +
geom_vline(aes(xintercept = mean(pol_acc),color='blue',lwd=1)) +
geom_vline(aes(xintercept =median(pol_acc),color='green',lwd=1)) +
ggtitle("Histogram for policeControlled accidents") +
xlab("Accidents")+
ylab("Count")+
theme_dark()
plot(policeControlled_hist)
## oUtlier detection using boxplot
policeControlled_boxplot=ggplot(policeControlled,aes(x=Region,y=pol_acc,color=Region)) +
geom_boxplot(outlier.color = 'red',outlier.shape = 2,outlier.size = 3) +
stat_summary(fun =mean,
geom = "point",
size = 3,
color = "steelblue") +
theme_classic()+
ggtitle("Boxplot for policeControlled accidents in every Region") +
xlab("REGION") +
ylab("Count")
plot(policeControlled_boxplot)
subset(policeControlled,policeControlled$pol_acc %in% boxplot(policeControlled$pol_acc ~ policeControlled$Region)$out)
## Region States_Uts pol_acc pol_kill pol_gre_inj pol_min_inj pol_tot_inj
## 3 northeast Assam 183 37 133 47 180
## kill_per_policeControlled_acc
## 3 2.021858
## plot
policeControlled_point=ggplot(policeControlled,aes(x=pol_acc,y=pol_kill)) +
geom_point(color='red',size=6) +
geom_smooth(method = lm) +
ggtitle("Scatter Plot for accidents") +
xlab("ACCIDENTS") +
ylab("KILL") +
theme_classic()
plot(overspeed_point)
## `geom_smooth()` using formula 'y ~ x'
## barplot
policeControlled_injuries=plot_ly(policeControlled,x= ~States_Uts) %>%
add_trace(y= ~pol_gre_inj,name='Greviously',type='bar') %>%
add_trace(y =~pol_min_inj,name='Minor',type='bar') %>%
add_trace(y =~pol_tot_inj,name='Total Injury',type='bar')
policeControlled_injuries
policeControlled_injuries_region=plot_ly(policeControlled,x= ~Region) %>%
add_trace(y= ~pol_gre_inj,name='Greviously',type='bar') %>%
add_trace(y =~pol_min_inj,name='Minor',type='bar') %>%
add_trace(y =~pol_tot_inj,name='Total Injury',type='bar')
policeControlled_injuries_region
top10_policeControlled_kill=filter(policeControlled,pol_acc != pol_kill ) %>%
top_n(10,kill_per_policeControlled_acc) %>%
arrange(desc(kill_per_policeControlled_acc)) %>%
ggplot(aes(x=States_Uts,y=kill_per_policeControlled_acc)) +
geom_bar(stat="identity",color="red",fill="orange",linetype="dashed")+
ggtitle("TOP 10 STATES WITH HIGH KILLS DUE TO policeControlled") +
xlab("STATES/Ut") +
ylab("policeControlled KILLS") +
theme_dark()
plot(top10_overspeed_kill)
bottom10_policeControlled_kill=filter(policeControlled,pol_acc != pol_kill & pol_acc > 0 & pol_kill > 0 &
pol_acc > pol_kill) %>%
top_n(-10,kill_per_policeControlled_acc) %>%
arrange(desc(kill_per_policeControlled_acc)) %>%
ggplot(aes(x=States_Uts,y=kill_per_policeControlled_acc)) +
geom_bar(stat="identity",color="red",fill="orange",linetype="dashed")+
ggtitle("TOP 10 STATES WITH LOW KILLS DUE TO policeControlled") +
xlab("STATES/Ut") +
ylab("policeControlled KILLS") +
theme_dark()
plot(bottom10_policeControlled_kill)
stopsignal=select(traffic,c(2:18))
stopsignal=select(stopsignal,c(-3:-12)) %>%
mutate(kill_per_stopsignal_acc=(stop_kill/stop_acc) * 10)
## histogram
stopsignal_hist=ggplot(stopsignal,aes(x=stop_acc)) +
geom_histogram(bins=30,color="red",fill="orange",linetype="dashed") +
geom_vline(aes(xintercept = mean(stop_acc),color='blue',lwd=1)) +
geom_vline(aes(xintercept = median(stop_acc),color='green',lwd=1)) +
ggtitle("Histogram for stopsignal accidents") +
xlab("Accidents")+
ylab("Count")+
theme_dark()
plot(stopsignal_hist)
## outlier detection using boxplot
stopsignal_boxplot=ggplot(stopsignal,aes(x=Region,y=stop_acc,color=Region)) +
geom_boxplot(outlier.color = 'red',outlier.shape = 2,outlier.size = 3) +
stat_summary(fun =mean,
geom = "point",
size = 3,
color = "steelblue") +
theme_classic()+
ggtitle("Boxplot for stopsignal accidents in every Region") +
xlab("REGION") +
ylab("Count")
plot(stopsignal_boxplot)
subset(stopsignal,stopsignal$stop_acc %in% boxplot(stopsignal$stop_acc ~ stopsignal$Region)$out)
## Region States_Uts stop_acc stop_kill stop_gre_inj stop_min_inj
## 3 northeast Assam 140 29 75 19
## 24 south Tamil Nadu 842 202 238 651
## 34 UT Delhi 217 55 36 155
## stop_tot_inj kill_per_stopsignal_acc
## 3 94 2.071429
## 24 889 2.399050
## 34 191 2.534562
## plot
stopsignal_point=ggplot(stopsignal,aes(x=stop_acc,y=stop_kill)) +
geom_point(color='red',size=6) +
geom_smooth(method = lm) +
ggtitle("Scatter Plot for accidents") +
xlab("ACCIDENTS") +
ylab("KILL") +
theme_classic()
plot(stopsignal_point)
## `geom_smooth()` using formula 'y ~ x'
## barplot
stopsignal_injuries=plot_ly(stopsignal,x= ~States_Uts) %>%
add_trace(y= ~stop_gre_inj,name='Greviously',type='bar') %>%
add_trace(y =~stop_min_inj,name='Minor',type='bar') %>%
add_trace(y =~stop_tot_inj,name='Total Injury',type='bar')
stopsignal_injuries
stopsignal_injuries_region=plot_ly(stopsignal,x= ~Region) %>%
add_trace(y= ~stop_gre_inj,name='Greviously',type='bar') %>%
add_trace(y =~stop_min_inj,name='Minor',type='bar') %>%
add_trace(y =~stop_tot_inj,name='Total Injury',type='bar')
stopsignal_injuries_region
top10_stopsignal_kill=filter(stopsignal,stop_acc != stop_kill ) %>%
top_n(10,kill_per_stopsignal_acc) %>%
arrange(desc(kill_per_stopsignal_acc)) %>%
ggplot(aes(x=States_Uts,y=kill_per_stopsignal_acc)) +
geom_bar(stat="identity",color="red",fill="orange",linetype="dashed")+
ggtitle("TOP 10 STATES WITH HIGH KILLS DUE TO stopsignal") +
xlab("STATES/UT") +
ylab("stopsignal KILLS") +
theme_dark()
plot(top10_stopsignal_kill)
bottom10_stopsignal_kill=filter(stopsignal,stop_acc != stop_kill & stop_acc > 0 & stop_kill > 0 &
stop_acc > stop_kill) %>%
top_n(-10,kill_per_stopsignal_acc) %>%
arrange(desc(kill_per_stopsignal_acc)) %>%
ggplot(aes(x=States_Uts,y=kill_per_stopsignal_acc)) +
geom_bar(stat="identity",color="red",fill="orange",linetype="dashed")+
ggtitle("TOP 10 STATES WITH LOW KILLS DUE TO stopsignal") +
xlab("STATES/UT") +
ylab("stopsignal KILLS") +
theme_dark()
plot(bottom10_stopsignal_kill)
#####################################################################################################
blinker=select(traffic,c(2:23))
blinker=select(blinker,c(-3:-17)) %>%
mutate(kill_per_blinker_acc=(blinker_kill/blinker_acc) * 10)
## histogram
blinker_hist=ggplot(blinker,aes(x=blinker_acc)) +
geom_histogram(bins=30,color="red",fill="orange",linetype="dashed") +
geom_vline(aes(xintercept = mean(blinker_acc),color='blue',lwd=1)) +
geom_vline(aes(xintercept = median(blinker_acc),color='green',lwd=1)) +
ggtitle("Histogram for blinker accidents") +
xlab("Accidents")+
ylab("Count")+
theme_dark()
plot(blinker_hist)
## outlier detection using boxplot
blinker_boxplot=ggplot(blinker,aes(x=Region,y=blinker_acc,color=Region)) +
geom_boxplot(outlier.color = 'red',outlier.shape = 2,outlier.size = 3) +
stat_summary(fun =mean,
geom = "point",
size = 3,
color = "steelblue") +
theme_classic()+
ggtitle("Boxplot for blinker accidents in every Region") +
xlab("REGION") +
ylab("Count")
plot(blinker_boxplot)
subset(blinker,blinker$blinker_acc %in% boxplot(blinker$blinker_acc ~ blinker$Region)$out)
## Region States_Uts blinker_acc blinker_kill blinker_gre_inj
## 3 northeast Assam 173 30 96
## 24 south Tamil Nadu 1499 264 287
## 34 UT Delhi 320 157 37
## blinker_min_inj blinker_tot_inj kill_per_blinker_acc
## 3 24 120 1.734104
## 24 1477 1764 1.761174
## 34 221 258 4.906250
## plot
blinker_point=ggplot(blinker,aes(x=blinker_acc,y=blinker_kill)) +
geom_point(color='red',size=6) +
geom_smooth(method = lm) +
ggtitle("Scatter Plot for accidents") +
xlab("ACCIDENTS") +
ylab("KILL") +
theme_classic()
plot(blinker_point)
## `geom_smooth()` using formula 'y ~ x'
## barplot
blinker_injuries=plot_ly(blinker,x= ~States_Uts) %>%
add_trace(y= ~blinker_gre_inj,name='Greviously',type='bar') %>%
add_trace(y =~blinker_min_inj,name='Minor',type='bar') %>%
add_trace(y =~blinker_tot_inj,name='Total Injury',type='bar')
blinker_injuries
blinker_injuries_region=plot_ly(blinker,x= ~Region) %>%
add_trace(y= ~blinker_gre_inj,name='Greviously',type='bar') %>%
add_trace(y =~blinker_min_inj,name='Minor',type='bar') %>%
add_trace(y =~blinker_tot_inj,name='Total Injury',type='bar')
blinker_injuries_region
top10_blinker_kill=filter(blinker,blinker_acc != blinker_kill ) %>%
top_n(10,kill_per_blinker_acc) %>%
arrange(desc(kill_per_blinker_acc)) %>%
ggplot(aes(x=States_Uts,y=kill_per_blinker_acc)) +
geom_bar(stat="identity",color="red",fill="orange",linetype="dashed")+
ggtitle("TOP 10 STATES WITH HIGH KILLS DUE TO Jumping blinker") +
xlab("STATES/UT") +
ylab("blinker KILLS") +
theme_dark()
plot(top10_blinker_kill)
bottom10_blinker_kill=filter(blinker,blinker_acc != blinker_kill & blinker_acc > 0 & blinker_kill > 0 &
blinker_acc > blinker_kill) %>%
top_n(-10,kill_per_blinker_acc) %>%
arrange(desc(kill_per_blinker_acc)) %>%
ggplot(aes(x=States_Uts,y=kill_per_blinker_acc)) +
geom_bar(stat="identity",color="red",fill="orange",linetype="dashed")+
ggtitle("TOP 10 STATES WITH LOW KILLS DUE TO blinker") +
xlab("STATES/UT") +
ylab("blinker KILLS") +
theme_dark()
plot(bottom10_blinker_kill)
uncontrolled=select(traffic,c(2:28))
uncontrolled=select(uncontrolled,c(-3:-22)) %>%
mutate(kill_per_uncontrolled_acc=(uncont_kill/uncont_acc) * 10)
## histogram
uncontrolled_hist=ggplot(uncontrolled,aes(x=uncont_acc)) +
geom_histogram(bins=30,color="red",fill="orange",linetype="dashed") +
geom_vline(aes(xintercept = mean(uncont_acc),color='blue',lwd=1)) +
geom_vline(aes(xintercept = median(uncont_acc),color='green',lwd=1)) +
ggtitle("Histogram for jumping uncontrolled accidents") +
xlab("Accidents")+
ylab("Count")+
theme_dark()
plot(uncontrolled_hist)
## outlier detection using boxplot
uncontrolled_boxplot=ggplot(uncontrolled,aes(x=Region,y=uncont_acc,color=Region)) +
geom_boxplot(outlier.color = 'red',outlier.shape = 2,outlier.size = 3) +
stat_summary(fun =mean,
geom = "point",
size = 3,
color = "steelblue") +
theme_classic()+
ggtitle("Boxplot for jumping uncontrolled accidents in every Region") +
xlab("REGION") +
ylab("Count")
plot(uncontrolled_boxplot)
subset(uncontrolled,uncontrolled$uncont_acc %in% boxplot(uncontrolled$uncont_acc ~ uncontrolled$Region)$out)
## Region States_Uts uncont_acc uncont_kill uncont_gre_inj uncont_min_inj
## 3 northeast Assam 2074 646 1303 232
## 22 north Rajasthan 4576 2052 1531 2763
## 34 UT Delhi 1943 516 352 1414
## uncont_tot_inj kill_per_uncontrolled_acc
## 3 1535 3.114754
## 22 4294 4.484266
## 34 1766 2.655687
## plot
uncontrolled_point=ggplot(uncontrolled,aes(x=uncont_acc,y=uncont_kill)) +
geom_point(color='red',size=6) +
geom_smooth(method = lm) +
ggtitle("Scatter Plot for accidents") +
xlab("ACCIDENTS") +
ylab("KILL") +
theme_classic()
plot(uncontrolled_point)
## `geom_smooth()` using formula 'y ~ x'
## barplot
uncontrolled_injuries=plot_ly(uncontrolled,x= ~States_Uts) %>%
add_trace(y= ~uncont_gre_inj,name='Greviously',type='bar') %>%
add_trace(y =~uncont_min_inj,name='Minor',type='bar') %>%
add_trace(y =~uncont_tot_inj,name='Total Injury',type='bar')
uncontrolled_injuries
uncontrolled_injuries_region=plot_ly(uncontrolled,x= ~Region) %>%
add_trace(y= ~uncont_gre_inj,name='Greviously',type='bar') %>%
add_trace(y =~uncont_min_inj,name='Minor',type='bar') %>%
add_trace(y =~uncont_tot_inj,name='Total Injury',type='bar')
uncontrolled_injuries_region
top10_uncontrolled_kill=filter(uncontrolled,uncont_acc != uncont_kill ) %>%
top_n(10,kill_per_uncontrolled_acc) %>%
arrange(desc(kill_per_uncontrolled_acc)) %>%
ggplot(aes(x=States_Uts,y=kill_per_uncontrolled_acc)) +
geom_bar(stat="identity",color="red",fill="orange",linetype="dashed")+
ggtitle("TOP 10 STATES WITH HIGH KILLS DUE TO Jumping uncontrolled") +
xlab("STATES/UT") +
ylab("Redsignal KILLS") +
theme_dark()
plot(top10_uncontrolled_kill)
bottom10_uncontrolled_kill=filter(uncontrolled,uncont_acc != uncont_kill & uncont_acc > 0 & uncont_kill > 0 & uncont_acc > uncont_kill) %>%
top_n(-10,kill_per_uncontrolled_acc) %>%
arrange(desc(kill_per_uncontrolled_acc)) %>%
ggplot(aes(x=States_Uts,y=kill_per_uncontrolled_acc)) +
geom_bar(stat="identity",color="red",fill="orange",linetype="dashed")+
ggtitle("TOP 10 STATES WITH LOW KILLS DUE TO uncontrolled") +
xlab("STATES/UT") +
ylab("Redsignal KILLS") +
theme_dark()
plot(bottom10_uncontrolled_kill)
######################################################################################################
other=select(traffic,c(2:33))
other=select(other,c(-3:-27)) %>%
mutate(kill_per_other_acc=(other_kill/other_acc) * 10)
## histogram
other_hist=ggplot(other,aes(x=other_acc)) +
geom_histogram(bins=30,color="red",fill="orange",linetype="dashed") +
geom_vline(aes(xintercept = mean(other_acc),color='blue',lwd=1)) +
geom_vline(aes(xintercept = median(other_acc),color='green',lwd=1)) +
ggtitle("Histogram for jumping other accidents") +
xlab("Accidents")+
ylab("Count")+
theme_dark()
plot(other_hist)
## outlier detection using boxplot
other_boxplot=ggplot(other,aes(x=Region,y=other_acc,color=Region)) +
geom_boxplot(outlier.color = 'red',outlier.shape = 2,outlier.size = 3) +
stat_summary(fun =mean,
geom = "point",
size = 3,
color = "steelblue") +
theme_classic()+
ggtitle("Boxplot for jumping other accidents in every Region") +
xlab("REGION") +
ylab("Count")
plot(other_boxplot)
subset(other,other$other_acc %in% boxplot(other$other_acc ~ other$Region)$out)
## Region States_Uts other_acc other_kill other_gre_inj other_min_inj
## 3 northeast Assam 5505 2190 4251 1054
## 34 UT Delhi 3142 783 510 2594
## other_tot_inj kill_per_other_acc
## 3 5305 3.978202
## 34 3104 2.492043
## plot
other_point=ggplot(other,aes(x=other_acc,y=other_kill)) +
geom_point(color='red',size=6) +
geom_smooth(method = lm) +
ggtitle("Scatter Plot for accidents") +
xlab("ACCIDENTS") +
ylab("KILL") +
theme_classic()
plot(other_point)
## `geom_smooth()` using formula 'y ~ x'
## barplot
other_injuries=plot_ly(other,x= ~States_Uts) %>%
add_trace(y= ~other_gre_inj,name='Greviously',type='bar') %>%
add_trace(y =~other_min_inj,name='Minor',type='bar') %>%
add_trace(y =~other_tot_inj,name='Total Injury',type='bar')
other_injuries
other_injuries_region=plot_ly(other,x= ~Region) %>%
add_trace(y= ~other_gre_inj,name='Greviously',type='bar') %>%
add_trace(y =~other_min_inj,name='Minor',type='bar') %>%
add_trace(y =~other_tot_inj,name='Total Injury',type='bar')
other_injuries_region
top10_other_kill=filter(other,other_acc != other_kill ) %>%
top_n(10,kill_per_other_acc) %>%
arrange(desc(kill_per_other_acc)) %>%
ggplot(aes(x=States_Uts,y=kill_per_other_acc)) +
geom_bar(stat="identity",color="red",fill="orange",linetype="dashed")+
ggtitle("TOP 10 STATES WITH HIGH KILLS DUE TO Jumping other") +
xlab("STATES/UT") +
ylab("Redsignal KILLS") +
theme_dark()
plot(top10_other_kill)
bottom10_other_kill=filter(other,other_acc != other_kill & other_acc > 0 & other_kill > 0 &
other_acc > other_kill) %>%
top_n(-10,kill_per_other_acc) %>%
arrange(desc(kill_per_other_acc)) %>%
ggplot(aes(x=States_Uts,y=kill_per_other_acc)) +
geom_bar(stat="identity",color="red",fill="orange",linetype="dashed")+
ggtitle("TOP 10 STATES WITH LOW KILLS DUE TO other") +
xlab("STATES/UT") +
ylab("Redsignal KILLS") +
theme_dark()
plot(bottom10_other_kill)
#########################################################################################
TRAFFIC_CONTROLS= data.frame(
traffic_controls=c('TrafficSignal','Police Controlled','Stop Signal','Flash/Blinker','Uncontrolled Places','Others'),
total_accidents=c(sum(traffic$tra_sig_acc),sum(traffic$pol_acc),sum(traffic$stop_acc),sum(traffic$blinker_acc),
sum(traffic$uncont_acc),sum(traffic$other_acc)),
total_kills=c(sum(traffic$tra_sig_kill),sum(traffic$pol_kill),sum(traffic$stop_kill),sum(traffic$blinker_kill),
sum(traffic$uncont_kill),sum(traffic$other_kill)),
total_injuries=c(sum(traffic$tra_sig_tot_inj),sum(traffic$pol_tot_inj),sum(traffic$stop_tot_inj),sum(traffic$blinker_tot_inj),
sum(traffic$uncont_tot_inj),sum(traffic$other_tot_inj))
)
TRAFFIC_CONTROLS=within(TRAFFIC_CONTROLS,{
traffic_controls=factor(traffic_controls)
})
str(TRAFFIC_CONTROLS)
## 'data.frame': 6 obs. of 4 variables:
## $ traffic_controls: Factor w/ 6 levels "Flash/Blinker",..: 5 3 4 1 6 2
## $ total_accidents : int 13726 12793 6513 7904 114133 311975
## $ total_kills : int 3325 4090 2491 2757 33149 105605
## $ total_injuries : int 12468 11519 5665 7378 109344 323044
summary(TRAFFIC_CONTROLS)
## traffic_controls total_accidents total_kills total_injuries
## Flash/Blinker :1 Min. : 6513 Min. : 2491 Min. : 5665
## Others :1 1st Qu.: 9126 1st Qu.: 2899 1st Qu.: 8413
## Police Controlled :1 Median : 13260 Median : 3708 Median : 11994
## Stop Signal :1 Mean : 77841 Mean : 25236 Mean : 78236
## TrafficSignal :1 3rd Qu.: 89031 3rd Qu.: 25884 3rd Qu.: 85125
## Uncontrolled Places:1 Max. :311975 Max. :105605 Max. :323044
cor(TRAFFIC_CONTROLS$total_accidents,TRAFFIC_CONTROLS$total_kills,method= 'pearson')
## [1] 0.9985625
acc_bar=plot_ly(TRAFFIC_CONTROLS,x= ~traffic_controls) %>%
add_trace(y= ~total_accidents,name='ACCIDENTS',type='bar') %>%
add_trace(y =~total_kills,name='DEATHS',type='bar') %>%
add_trace(y =~total_injuries,name='INJURY',type='bar')
acc_bar
tc_acc_pie=plot_ly(TRAFFIC_CONTROLS,labels= ~traffic_controls,values= ~total_accidents,type = 'pie',
textposition = 'inside',
textinfo = 'label+percent')
tc_acc_pie
tc_kill_pie=plot_ly(TRAFFIC_CONTROLS,labels= ~traffic_controls,values= ~total_kills,type = 'pie',
textposition = 'inside',
textinfo = 'label+percent')
tc_kill_pie
tc_inj_pie=plot_ly(TRAFFIC_CONTROLS,labels= ~traffic_controls,values= ~total_injuries,type = 'pie',
textposition = 'inside',
textinfo = 'label+percent')
tc_inj_pie